import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import librosa
import librosa.display
import soundfile
import random
from IPython.display import display, Audio
t = np.arange(0, 16, 1/100)
sig = np.sin(t)
plt.plot(t, sig)
plt.hlines(0, min(t), max(t), colors='r');
../_images/wavesets2_3_0.png
# np.sign reduces a number to its sign
# we shift the signal by 1 and see where sign transitions
# which gives us the crossig
crossings = np.where(np.sign(sig[:-1]) < (np.sign(sig[1:])))[0]
crossings
array([   0,  628, 1256])
# optional splitting of signal at crossings from negative to non-negative
np.split(sig, crossings);
plt.plot(t, sig)
plt.hlines(0, min(t), max(t), colors='r')
# scale crossings to time resolution
plt.vlines(crossings/100, min(sig), max(sig), colors='g');
../_images/wavesets2_6_0.png
def separate_wavesets(array, min_length):
    # np.empty or np.zeroes - aber das kann nicht gut wachsen wegen
    # der C bibliothek im hintergrund
    chunks = np.array(0) # soll eigentlich leer sein, "geht aber nicht"
    prev_val = np.array(0.)  # warum als array?
    current_chunk = np.array(prev_val)  # s.o.
    chunk_length = 1
    for i in range(1, len(array)):
        val = array[i]  # use foreach loop instead of iterating over indices
        if (prev_val < 0. and val >= 0 and chunk_length >= min_length):
            # only checks for one crossing
            np.append(chunks, current_chunk)
            current_chunk = []
            chunk_length = 0
        np.append(current_chunk, val)  # this is really heavy on the performance
        chunk_length = chunk_length + 1
        prev_val = val
    np.append(chunks, current_chunk)
    return chunks

Finding wavesets in a sound fileΒΆ

path = 'violin.flac'
data, sr = librosa.load(path, sr=44100, mono=True)
display(Audio(path))
def wavesets_breakpoints(array: np.ndarray, min_length: int):
    indices = [0]
    prev_item = array[0]
    for i, item in enumerate(array):
        if ((item > 0.0 and prev_item <= 0.0) and (i-indices[-1] >= min_length)) :
            indices.append(i)
        prev_item = item
    return indices
def separate_wavesets(array: np.ndarray, min_length: int):
    ws = np.split(array, wavesets_breakpoints(array, min_length))
    ws.pop(0) # we ignore the first
    return ws
wavesets_data = separate_wavesets(data, 128)
wavesets_data[1]
array([ 1.6326904e-03,  3.0364990e-03,  4.3487549e-03,  5.5694580e-03,
        6.1340332e-03,  6.4086914e-03,  6.8206787e-03,  7.2326660e-03,
        6.9732666e-03,  5.8135986e-03,  4.3182373e-03,  2.7618408e-03,
        6.4086914e-04, -1.6326904e-03, -3.6468506e-03, -5.0506592e-03,
       -5.9509277e-03, -6.5612793e-03, -6.5460205e-03, -5.7830811e-03,
       -4.6234131e-03, -3.2806396e-03, -2.6550293e-03, -2.4871826e-03,
       -1.7547607e-03,  9.1552734e-05,  2.4108887e-03,  4.6386719e-03,
        6.3629150e-03,  6.6833496e-03,  5.7830811e-03,  4.5318604e-03,
        3.7841797e-03,  3.2806396e-03,  2.9602051e-03,  2.7008057e-03,
        2.1972656e-03,  1.0070801e-03, -6.7138672e-04, -2.1972656e-03,
       -3.1890869e-03, -4.2572021e-03, -5.2337646e-03, -5.3253174e-03,
       -5.2185059e-03, -5.2795410e-03, -5.0659180e-03, -4.5928955e-03,
       -4.3945312e-03, -3.9520264e-03, -2.2888184e-03,  4.5776367e-05,
        2.6702881e-03,  4.6386719e-03,  5.6762695e-03,  5.6457520e-03,
        4.9285889e-03,  4.2266846e-03,  4.3792725e-03,  4.6081543e-03,
        4.5623779e-03,  3.8146973e-03,  2.3193359e-03,  1.5258789e-04,
       -1.6784668e-03, -2.8076172e-03, -3.4027100e-03, -4.1046143e-03,
       -5.1879883e-03, -6.4544678e-03, -7.1563721e-03, -6.9122314e-03,
       -5.9967041e-03, -5.1422119e-03, -4.3945312e-03, -3.5400391e-03,
       -2.7160645e-03, -1.8920898e-03, -7.7819824e-04,  6.8664551e-04,
        1.8310547e-03,  2.1362305e-03,  2.3803711e-03,  3.0364990e-03,
        3.6468506e-03,  3.9978027e-03,  3.9978027e-03,  3.6315918e-03,
        2.9907227e-03,  2.8533936e-03,  3.2958984e-03,  3.1127930e-03,
        1.7089844e-03, -3.6621094e-04, -2.3040771e-03, -3.8299561e-03,
       -4.7607422e-03, -5.0354004e-03, -4.7607422e-03, -4.6081543e-03,
       -4.8522949e-03, -5.2490234e-03, -4.9438477e-03, -3.7384033e-03,
       -2.7160645e-03, -1.8768311e-03, -8.0871582e-04,  4.5776367e-04,
        1.4495850e-03,  1.6326904e-03,  1.9073486e-03,  1.8310547e-03,
        8.3923340e-04, -2.5939941e-04, -3.0517578e-05,  1.0681152e-03,
        1.9683838e-03,  3.1738281e-03,  3.7689209e-03,  3.3721924e-03,
        1.8768311e-03,  7.6293945e-04,  5.6457520e-04,  3.3569336e-04,
       -3.5095215e-04, -1.2207031e-03, -2.0751953e-03, -2.8228760e-03,
       -3.3264160e-03, -2.5329590e-03, -5.6457520e-04], dtype=float32)
wavesets_data_sorted = sorted(wavesets_data, key=lambda x: x.shape[0])
wavesets_data[0].shape
(156,)
len(wavesets_data)
8053
wavesets_data_sorted[-1].shape
(603,)
flat = np.concatenate(wavesets_data_sorted)
display(Audio(flat, rate=sr))
flat.shape
(1218263,)
flat_stretched = np.empty(shape=flat.shape[0] * 3)
flat_stretched[0::3] = flat
flat_stretched[1::3] = flat
flat_stretched[2::3] = flat
display(Audio(flat_stretched, rate=sr))
WIN_LENGTH = 512
HOP_LENGTH = 512
N_FFT = 512

wavesets_fft = []
for i, item in enumerate(wavesets_data):
    data_fft = librosa.stft(item, n_fft=N_FFT, hop_length=HOP_LENGTH, win_length=WIN_LENGTH)
    wavesets_fft.append(data_fft)
/Volumes/data/git/musikinformatik-sose2021/venv/lib/python3.8/site-packages/librosa/core/spectrum.py:222: UserWarning: n_fft=512 is too small for input signal of length=326
  warnings.warn(
len(wavesets_fft) # why is this slow?
588

Rearranging wavesetsΒΆ

Random orderΒΆ

wavesets_data_shuffled = wavesets_data.copy()
random.shuffle(wavesets_data_shuffled)
data_shuffled = []
for sublist in wavesets_data_shuffled:
    for item in sublist:
        data_shuffled.append(item)
display(Audio(data_shuffled, rate=sr))
def flatten_wavesets(wavesets):
    flattened = []
    for sublist in wavesets_data_shuffled:
        for item in sublist:
            flattened.append(item)
    return flattened
## test: do we get back the original ?
## this seems not quite correct
original_data = flatten_wavesets(wavesets_data)
display(Audio(original_data, rate=sr))
## time stretching
stretched_data = []
for item in wavesets_data:
    for i in range(2):
        stretched_data.append(item)
flattened_data = flatten_wavesets(stretched_data)
display(Audio(flattened_data, rate=sr))
# Now we can write this a bit shorter
wavesets_data_shuffled = wavesets_data.copy()
data_shuffled = flatten_wavesets(random.shuffle(wavesets_data_shuffled))
display(Audio(data_shuffled, rate=sr))
## this seems not quite correct
reordered_wavesets = wavesets_data.reverse()
data_reversed = flatten_wavesets(reordered_wavesets)
display(Audio(data_reversed, rate=sr))